
library(haven)
library(openxlsx)
library(tidyr)
library(broom)
d <- read_dta("Desktop/PHD/Article projects/Datasets/ANESMicroFiles/Anes19482016_workingfile.dta")


# MUST RECODE INCOME GROUP 

table(d$income)
table(d$sex)
d$sex <- car::recode(d$sex,"1=1;2=0;else=NA")
cor(d$sex,d$incomegroup,use="complete.obs")
d_org <- d
table(d_org$year)

# 1964
sequence<-seq(1964,2012,2)
sequence <- sequence[!sequence==2006]
sequence <- sequence[!sequence==2010]
year <- NULL
covs <- NULL
covsdat <- data.frame(year,covs)
for(i in sequence){
d <- d_org %>% filter(year==i)
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
covs <- cov(d$score_inc,d$sex)
year <- i
dat <- data.frame(year,covs)
covsdat <- rbind(covsdat,dat)
}

m1 <- loess(covs~year,data=covsdat)
preds<-data.frame(year=seq(1960,2012,1))
preds$cov<-predict(m1,preds)
covariances_gender_income <- preds
write.xlsx(covariances_gender_income,"USA_covariances_gender_income.xlsx")

                                         